****************************************
** Clean up details of Midline survey done in Summer of 2014.
    ** TODO: Keep- Pick variables to merge with Baseline
    ** TODO: Merge- merge in key info from edu & HH roster modules.
    ** TODO: Re-label- Get varnames to match between baseline & midline
    ** TODO: Do topcoding, bottom-coding, etc. (Be consistent with Baseline_cleanup.do)
    ** TODO: Write to CSV/dta.
    ** elliottmcollins@berkeley.edu; 2014-11-26
****************************************

clear
clear matrix
set more off
* Elliott
**cd Z:\home\elliott\Dropbox\1Spring2016\TUP\data

********************************************************
** Collapse HHroster vars for merge with ssbusiness_survey
********************************************************
    use Midline\household_roster, clear
    replace s12=1 if ln==1 & s12==.
    gen ischild  = s14<=18
    gen ischild2 = s14<=15                    
    gen issmallchild = s14<5                  
    gen girls    = (s14<=15 & s13==0)         
    gen boys     = (s14<=15 & s13==1)         
    gen men      = (s14>15  & s13==0)         
    gen women    = (s14>15  & s13==1)         
    gen age=.
    replace age=s14 if ln==1

    * fix ID Errors & Duplicate (Has to be done separately in each file)
        * This may need modification for the sake of household_roster.dta
        replace cln = 109 if rid==1102
        replace cln = 287 if rid==1807
        replace cln = 455 if rid==1459
        replace cln = 484 if rid==1577
        replace cln = 598 if rid==1984
        replace cln = 466 if rid==1464
        replace cln = 2   if v1==43

        replace rid = 1187 if v1==43
        replace rid = 1601 if rid==160
        replace rid = 1571 if rid== 1520 & cln == 448
        replace rid = 1651 if rid== 1647 & cln == 245
        replace rid = 1715 if rid== 1716 & cln == 239
        replace rid = 1778 if rid== 1775 & cln == 204
        replace rid = 2246 if rid== 2046 & cln == 729
        replace rid = 1444 if rid== 2111 & cln == 429
        * One duplicate has gotten through unresolved...
        drop if cln==704 & rid == 2230
        * & enu=="AKA ROSE"
        drop if rid == 602 

    * This is going to get some weird answers for the duplicate ids fixed below. Maybe port some of those entry error corrections
    collapse (max) hh_size=ln (sum) age girls boys men women child_total=ischild  children=ischild2 smallchildren=issmallchild, by(rid) //*** NEW
    save Midline/HHvars, replace

***********************************************
** Clean & name vars from ssbusiness_survey.dta
***********************************************
use Midline\ssbusiness_survey, clear

* Format Date
    gen date_m = date(substr(idate,1,10),"YMD")
    * gen D = date(date_m,"YMD")
    order date
    format date_m %td

* fix ID Errors & Duplicate
    * This may need modification for the sake of household_roster.dta
    replace cln = 109 if rid==1102
    replace cln = 287 if rid==1807
    replace cln = 455 if rid==1459
    replace cln = 484 if rid==1577
    replace cln = 598 if rid==1984
    replace cln = 466 if rid==1464
    replace cln = 2   if v1==43

    replace r_id= 1187 if v1==43
    replace rid = 1187 if v1==43
    replace rid = 1601 if rid== 160
    replace rid = 1571 if rid== 1520 & cln == 448
    replace rid = 1651 if rid== 1647 & cln == 245
    replace rid = 1715 if rid== 1716 & cln == 239
    replace rid = 1778 if rid== 1775 & cln == 204
    replace rid = 2246 if rid== 2046 & cln == 729
    replace rid = 1444 if rid== 2111 & cln == 429
    * One duplicate has gotten through unresolved...
    drop if cln==704 & rid == 2230 & enu=="AKA ROSE"
    drop if rid == 602 

** Merge in HHvars
    merge 1:1 rid using Midline\HHvars
    tab _m
    rename _m merge_HH

* Which variables to keep for analysis
** OR you can keep all by commenting out the "keep" line. That will take a little debugging...
    * s3 Self-employment
    * s7*- Assets
    * s8- Cash savings
    * s8b- food savings
    * s9*- HH Decision Making
    * s15*- Number of people eating
    * s16a_* s18* -- Consumption
    * HH vars- 
    *   ____*-- land_total
    *   s101*-- Transfers received
    *   s102*-- Transfers given
    *   s11_*-- Confidence
    *   s21_*-- Food Security
    *   s22_*-- Conflict Questions
    *   s25_*-- Female Autonomy
    **  exclude res fname husband for anonymity
    keep rid cln enu hh_size child_total girls boys men women children smallchildren age s3* s5* s6* s7a* s8a* s8b* s9* s101* s102* s7b* s15* s16a_* s18* s20* s21* s22* s11* s25*

    
** Re-label variables to match baseline data. The key for all of these is "varmap.csv". The spreadsheet that takes raw data variable names and gives them
** meaningful variable names in baseline and midline.
do Midline/rename_vars

** Clean-up values

****************************************
    * Clean-up all expenditure & asset data:
    * Save raw values * Set blanks to zero
    * Current Top-code Method: 99th percentile
****************************************

foreach item of varlist c_* asset_val* {
    gen raw_`item' = `item'
    replace `item' = 0 if `item'==.
    quietly: sum `item', d
    replace `item' = r(p99) if `item'>r(p99) & `item' != .
    }

** Tag those few surveys with no non-zeroes or all zeros (enumerators were confused & more informative to just eliminate that data)
    gen cons_issue=1 if min(c_maize_m, c_rice_m, c_bread_m,  c_meat_m, c_poultry_m, c_fish_m, c_egg_m)>0
    replace cons_issue=1 if max(c_cereals, c_beans, c_oil, c_salt, c_sugar, c_meat, c_fish, c_egg, c_milk, c_vegetables, c_fruit, c_spices, c_alcohol)<1
    disp "SURVEYS WITH BOTCHED CONSUMPTION SECTIONS:"
    tab cons_issue
    foreach C of var c_maize_m-c_alcohol_m{ 
        replace `C'=. if cons_issue==1 
        }

** Make Meals-served variable: served_3days
    foreach day of var s15* {
        replace `day'=0 if `day'==.
        }
    egen served_3days = rowtotal(s15*)

** Delete some obviously wrong values
    replace transfers_get1_m =. if transfers_get1_m > 700000

* Merge in Treatment Variables
sort idno
merge 1:1 idno using Tgroups
drop if _m==2
rename _merge merge_Tgroups_m

save Midline\TUP_midline, replace

